# !conda install -c plotly plotly -y
# Importando as bibliotecas necessárias
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.tools import FigureFactory as FF
from IPython.core.display import display, HTML
import warnings
import operator
%matplotlib inline
rcParams['figure.figsize'] = 7.5, 6
sns.set_style('whitegrid')
warnings.filterwarnings('ignore')
init_notebook_mode(connected=True)
pd.set_option('display.max_columns', None)
mcq = pd.read_csv('../bases/multiple_choice_responses.csv')
question = pd.read_csv('../bases/questions_only.csv')
text_response = pd.read_csv('../bases/other_text_responses.csv')
c = pd.value_counts(mcq.Q5)
d = pd.DataFrame(data=c)
d.columns=['count']
d = d.iloc[:9,:].sort_values('count', ascending=True)
d['count'] = pd.to_numeric(d['count'], errors='coerce')
plt.barh(d.index, d["count"])
plt.show()
df=mcq[mcq['Q5']=='Data Scientist']
country_dist=df['Q3'].value_counts()
fig = px.choropleth(country_dist.values, locations=country_dist.index,
locationmode='country names',
color=country_dist.values,
color_continuous_scale=px.colors.sequential.OrRd)
fig.update_layout(title="Distribuição dos Cientistas de Dados pelo Globo")
plot(fig, filename='figure.html')
display(HTML('figure.html'))
gen = df['Q2'].value_counts()
plt.bar(x=gen.index, height=gen.values)
plt.show()
texto = df['Q1'].value_counts().index
valor = df['Q1'].value_counts().values
plt.plot(texto, valor, '-ok')
reg = df['Q4'].value_counts(sort=True)
outros = 0
texto = []
valor = []
for i in reg.index:
if reg[i] < 760:
outros += reg[i]
else:
texto.append(i)
valor.append(reg[i])
texto.append('Outros')
valor.append(outros)
plt.pie(valor, labels=texto, autopct='%1.1f%%', shadow=True, startangle=90)
plt.show()
mooc={}
for i in range(12):
texto=df['Q13_Part_'+str(i+1)].value_counts().index[0]
valor=df['Q13_Part_'+str(i+1)].value_counts().values[0]
mooc[texto]=valor
mooc=dict(sorted(mooc.items(), key=operator.itemgetter(1)))
plt.barh(y=list(mooc.keys()), width=list(mooc.values()), color='#03396c')
plt.title('MOOC (Massive Open Online Courses)')
plt.show()
pub={}
for i in range(12):
value=df['Q12_Part_'+str(i+1)].value_counts().values[0]
text=df['Q12_Part_'+str(i+1)].value_counts().index[0]
pub[text]=value
pub=dict(sorted(pub.items(), key=operator.itemgetter(1)))
plt.barh(y=list(pub.keys()), width=list(pub.values()))
plt.title('Páginas da Comunidade')
plt.show()
texto=[]
valor=[]
for i in range(8):
texto.append(df['Q9_Part_'+str(i+1)].value_counts().index[0])
valor.append(df['Q9_Part_'+str(i+1)].value_counts().values[0])
plt.barh(y=texto, width=valor)
plt.show()
anos = df['Q15'].value_counts().index
valor = df['Q15'].value_counts().values
plt.bar(x = anos, height = valor)
plt.xticks(rotation='vertical')
plt.show()
tool = df['Q14'].value_counts().index
value2 = df['Q14'].value_counts().values
plt.barh(y=tool, width=value2)
plt.show()
vis = []
value3 = []
for i in range(12):
value=df['Q20_Part_'+str(i+1)].value_counts().values[0]
text=df['Q20_Part_'+str(i+1)].value_counts().index[0]
vis.append(text)
value3.append(value)
plt.plot(vis, value3, '-ok')
plt.xticks(rotation='vertical')
Até a próxima \ Fernando Anselmo